{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "from tpot import TPOTClassifier\n",
    "from sklearn.datasets import load_iris\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "Load the IRIS data set and explore its contents. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[ 5.1,  3.5,  1.4,  0.2],\n",
       "        [ 4.9,  3. ,  1.4,  0.2],\n",
       "        [ 4.7,  3.2,  1.3,  0.2],\n",
       "        [ 4.6,  3.1,  1.5,  0.2],\n",
       "        [ 5. ,  3.6,  1.4,  0.2]]),\n",
       " array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "        0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "        1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]))"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris = load_iris()\n",
    "iris.data[0:5], iris.target"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "Split the data set in train and test. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((112, 4), (38, 4), (112,), (38,))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,\n",
    "                                                    train_size=0.75, test_size=0.25)\n",
    "X_train.shape, X_test.shape, y_train.shape, y_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: xgboost.XGBClassifier is not available and will not be used by TPOT.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 200/200 [00:18<00:00,  9.33pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 1 - Current best internal CV score: 0.9825757575757577\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 300/300 [00:28<00:00, 12.80pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 2 - Current best internal CV score: 0.9825757575757577\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 400/400 [00:39<00:00, 11.16pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 3 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 500/500 [00:45<00:00, 17.87pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 4 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 600/600 [00:52<00:00, 16.63pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 5 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 700/700 [00:58<00:00, 17.42pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 6 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 800/800 [01:06<00:00, 14.45pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 7 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 900/900 [01:11<00:00, 12.45pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 8 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 1000/1000 [01:17<00:00,  8.87pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 9 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 1100/1100 [01:23<00:00, 11.12pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 10 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 1200/1200 [01:29<00:00,  9.37pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 11 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 1300/1300 [01:34<00:00,  9.55pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 12 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 1400/1400 [01:40<00:00, 16.09pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 13 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 1500/1500 [01:46<00:00,  9.96pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 14 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 1600/1600 [01:52<00:00, 10.02pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 15 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Optimization Progress: 100%|██████████| 1700/1700 [01:59<00:00,  7.28pipeline/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generation 16 - Current best internal CV score: 0.990909090909091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "          \r",
      "\r",
      "          \r",
      "Optimization Progress:  95%|█████████▍| 1703/1800 [02:01<00:13,  7.28pipeline/s]\r",
      "          \r",
      "\r",
      "          \r",
      "Optimization Progress:  95%|█████████▍| 1703/1800 [02:01<00:13,  7.28pipeline/s]\r",
      "                                                                                "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "TPOT closed prematurely. Will use the current best pipeline.\n",
      "\n",
      "Best pipeline: DecisionTreeClassifier(RBFSampler(input_matrix, RBFSampler__gamma=0.85), DecisionTreeClassifier__criterion=entropy, DecisionTreeClassifier__max_depth=3, DecisionTreeClassifier__min_samples_leaf=4, DecisionTreeClassifier__min_samples_split=9)\n",
      "0.973684210526\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r"
     ]
    }
   ],
   "source": [
    "tpot = TPOTClassifier(verbosity=2, max_time_mins=2)\n",
    "tpot.fit(X_train, y_train)\n",
    "print(tpot.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "tpot.export('tpot_iris_pipeline.py')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "# %load tpot_iris_pipeline.py\n",
    "import numpy as np\n",
    "\n",
    "from sklearn.kernel_approximation import RBFSampler\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.pipeline import make_pipeline\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "\n",
    "# NOTE: Make sure that the class is labeled 'class' in the data file\n",
    "tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)\n",
    "features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)\n",
    "training_features, testing_features, training_classes, testing_classes = \\\n",
    "    train_test_split(features, tpot_data['class'], random_state=42)\n",
    "\n",
    "exported_pipeline = make_pipeline(\n",
    "    RBFSampler(gamma=0.8500000000000001),\n",
    "    DecisionTreeClassifier(criterion=\"entropy\", max_depth=3, min_samples_leaf=4, min_samples_split=9)\n",
    ")\n",
    "\n",
    "exported_pipeline.fit(training_features, training_classes)\n",
    "results = exported_pipeline.predict(testing_features)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
